{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "74ba2e8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SentenceTransformer(\n",
       "  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel \n",
       "  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
       "  (2): Normalize()\n",
       ")"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sentence_transformers import SentenceTransformer\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "\n",
    "model = SentenceTransformer('all-MiniLM-L6-v2', device=device)\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "784f6fda",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'lM5ZgEQNTkO8phtT5qZAHQ', 'version': {'number': '8.10.0', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'e338da74c79465dfdc204971e600342b0aa87b6b', 'build_date': '2023-09-07T08:16:21.960703010Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n"
     ]
    }
   ],
   "source": [
    "from elasticsearch import Elasticsearch\n",
    "\n",
    "ELASTCSEARCH_CERT_PATH = \"/Users/liuxg/elastic/elasticsearch-8.10.0/config/certs/http_ca.crt\"\n",
    " \n",
    "client = Elasticsearch(  ['https://localhost:9200'],\n",
    "    basic_auth = ('elastic', 'vXDWYtL*my3vnKY9zCfL'),\n",
    "    ca_certs = ELASTCSEARCH_CERT_PATH,\n",
    "    verify_certs = True)\n",
    "print(client.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "cc2066f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'book_index'})"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "INDEX_NAME = \"book_index\"\n",
    "\n",
    "if es.indices.exists(index=INDEX_NAME):\n",
    "    print(\"Deleting existing %s\" % INDEX_NAME)\n",
    "    client.options(ignore_status=[400, 404]).indices.delete(index=INDEX_NAME)\n",
    "\n",
    "# Define the mapping\n",
    "mappings = {\n",
    "    \"properties\": {\n",
    "        \"title_vector\": {\n",
    "            \"type\": \"dense_vector\",\n",
    "            \"dims\": 384,\n",
    "            \"index\": \"true\",\n",
    "            \"similarity\": \"cosine\"\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "\n",
    "# Create the index\n",
    "client.indices.create(index = INDEX_NAME, mappings = mappings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "95395d32",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'errors': False, 'took': 55, 'items': [{'index': {'_index': 'book_index', '_id': 'cL8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'cb8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'cr8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'c78-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'dL8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'db8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 5, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'dr8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'd78-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'eL8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'book_index', '_id': 'eb8-DYsByaLf0EuT882D', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}]})"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "# Load data into a JSON object\n",
    "with open('books.json') as f:\n",
    "   data_json = json.load(f)\n",
    " \n",
    "actions = []\n",
    "for book in data_json:\n",
    "    actions.append({\"index\": {\"_index\": INDEX_NAME}})\n",
    "    # Transforming the title into an embedding using the model\n",
    "    book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
    "    actions.append(book)\n",
    "client.bulk(index=INDEX_NAME, operations=actions)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "58df2f76",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pretty_response(response):\n",
    "    for hit in response['hits']['hits']:\n",
    "        id = hit['_id']\n",
    "        publication_date = hit['_source']['publish_date']\n",
    "        score = hit['_score']\n",
    "        title = hit['_source']['title']\n",
    "        summary = hit['_source']['summary']\n",
    "        publisher = hit[\"_source\"][\"publisher\"]\n",
    "        num_reviews = hit[\"_source\"][\"num_reviews\"]\n",
    "        authors = hit[\"_source\"][\"authors\"]\n",
    "        pretty_output = (f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nPublisher: {publisher}\\nReviews: {num_reviews}\\nAuthors: {authors}\\nScore: {score}\")\n",
    "        print(pretty_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e1b57405",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ID: eL8-DYsByaLf0EuT882D\n",
      "Publication date: 2008-05-15\n",
      "Title: JavaScript: The Good Parts\n",
      "Summary: A deep dive into the parts of JavaScript that are essential to writing maintainable code\n",
      "Publisher: oreilly\n",
      "Reviews: 51\n",
      "Authors: ['douglas crockford']\n",
      "Score: 0.80752474\n",
      "\n",
      "ID: dL8-DYsByaLf0EuT882D\n",
      "Publication date: 2015-03-27\n",
      "Title: You Don't Know JS: Up & Going\n",
      "Summary: Introduction to JavaScript and programming as a whole\n",
      "Publisher: oreilly\n",
      "Reviews: 36\n",
      "Authors: ['kyle simpson']\n",
      "Score: 0.6946183\n",
      "\n",
      "ID: db8-DYsByaLf0EuT882D\n",
      "Publication date: 2018-12-04\n",
      "Title: Eloquent JavaScript\n",
      "Summary: A modern introduction to programming\n",
      "Publisher: no starch press\n",
      "Reviews: 38\n",
      "Authors: ['marijn haverbeke']\n",
      "Score: 0.6617909\n",
      "\n",
      "ID: cL8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-10-29\n",
      "Title: The Pragmatic Programmer: Your Journey to Mastery\n",
      "Summary: A guide to pragmatic programming for software engineers and developers\n",
      "Publisher: addison-wesley\n",
      "Reviews: 30\n",
      "Authors: ['andrew hunt', 'david thomas']\n",
      "Score: 0.61159486\n",
      "\n",
      "ID: eb8-DYsByaLf0EuT882D\n",
      "Publication date: 2012-06-27\n",
      "Title: Introduction to the Theory of Computation\n",
      "Summary: Introduction to the theory of computation and complexity theory\n",
      "Publisher: cengage learning\n",
      "Reviews: 33\n",
      "Authors: ['michael sipser']\n",
      "Score: 0.58697784\n",
      "\n",
      "ID: d78-DYsByaLf0EuT882D\n",
      "Publication date: 2011-05-13\n",
      "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n",
      "Summary: A guide to professional conduct in the field of software engineering\n",
      "Publisher: prentice hall\n",
      "Reviews: 20\n",
      "Authors: ['robert c. martin']\n",
      "Score: 0.57042736\n",
      "\n",
      "ID: dr8-DYsByaLf0EuT882D\n",
      "Publication date: 1994-10-31\n",
      "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n",
      "Summary: Guide to design patterns that can be used in any object-oriented language\n",
      "Publisher: addison-wesley\n",
      "Reviews: 45\n",
      "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n",
      "Score: 0.5617569\n",
      "\n",
      "ID: c78-DYsByaLf0EuT882D\n",
      "Publication date: 2008-08-11\n",
      "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n",
      "Summary: A guide to writing code that is easy to read, understand and maintain\n",
      "Publisher: prentice hall\n",
      "Reviews: 55\n",
      "Authors: ['robert c. martin']\n",
      "Score: 0.55407417\n",
      "\n",
      "ID: cr8-DYsByaLf0EuT882D\n",
      "Publication date: 2020-04-06\n",
      "Title: Artificial Intelligence: A Modern Approach\n",
      "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n",
      "Publisher: pearson\n",
      "Reviews: 39\n",
      "Authors: ['stuart russell', 'peter norvig']\n",
      "Score: 0.5461982\n",
      "\n",
      "ID: cb8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-05-03\n",
      "Title: Python Crash Course\n",
      "Summary: A fast-paced, no-nonsense guide to programming in Python\n",
      "Publisher: no starch press\n",
      "Reviews: 42\n",
      "Authors: ['eric matthes']\n",
      "Score: 0.536102\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_23541/2388497951.py:1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n",
      "  response = client.search(index=\"book_index\", body={\n"
     ]
    }
   ],
   "source": [
    "response = client.search(index=\"book_index\", body={\n",
    "    \"knn\": {\n",
    "      \"field\": \"title_vector\",\n",
    "      \"query_vector\": model.encode(\"Best javascript books?\"),\n",
    "      \"k\": 10,\n",
    "      \"num_candidates\": 100\n",
    "    }\n",
    "})\n",
    "\n",
    "pretty_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "278287bd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ID: cL8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-10-29\n",
      "Title: The Pragmatic Programmer: Your Journey to Mastery\n",
      "Summary: A guide to pragmatic programming for software engineers and developers\n",
      "Publisher: addison-wesley\n",
      "Reviews: 30\n",
      "Authors: ['andrew hunt', 'david thomas']\n",
      "Score: 0.61159486\n",
      "\n",
      "ID: dr8-DYsByaLf0EuT882D\n",
      "Publication date: 1994-10-31\n",
      "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n",
      "Summary: Guide to design patterns that can be used in any object-oriented language\n",
      "Publisher: addison-wesley\n",
      "Reviews: 45\n",
      "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n",
      "Score: 0.5617569\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_23541/56853983.py:1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n",
      "  response = client.search(index=INDEX_NAME, body={\n"
     ]
    }
   ],
   "source": [
    "response = client.search(index=INDEX_NAME, body={\n",
    "    \"knn\": {\n",
    "      \"field\": \"title_vector\",\n",
    "      \"query_vector\": model.encode(\"Best javascript books?\"),\n",
    "      \"k\": 10,\n",
    "      \"num_candidates\": 100,\n",
    "      \"filter\": {\n",
    "          \"term\": {\n",
    "              \"publisher.keyword\": \"addison-wesley\"\n",
    "          }\n",
    "      }\n",
    "    }\n",
    "})\n",
    "\n",
    "pretty_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "011ae0cf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ID: cL8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-10-29\n",
      "Title: The Pragmatic Programmer: Your Journey to Mastery\n",
      "Summary: A guide to pragmatic programming for software engineers and developers\n",
      "Publisher: addison-wesley\n",
      "Reviews: 30\n",
      "Authors: ['andrew hunt', 'david thomas']\n",
      "Score: 0.61159486\n",
      "\n",
      "ID: d78-DYsByaLf0EuT882D\n",
      "Publication date: 2011-05-13\n",
      "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n",
      "Summary: A guide to professional conduct in the field of software engineering\n",
      "Publisher: prentice hall\n",
      "Reviews: 20\n",
      "Authors: ['robert c. martin']\n",
      "Score: 0.57042736\n",
      "\n",
      "ID: dr8-DYsByaLf0EuT882D\n",
      "Publication date: 1994-10-31\n",
      "Title: Design Patterns: Elements of Reusable Object-Oriented Software\n",
      "Summary: Guide to design patterns that can be used in any object-oriented language\n",
      "Publisher: addison-wesley\n",
      "Reviews: 45\n",
      "Authors: ['erich gamma', 'richard helm', 'ralph johnson', 'john vlissides']\n",
      "Score: 0.5617569\n",
      "\n",
      "ID: c78-DYsByaLf0EuT882D\n",
      "Publication date: 2008-08-11\n",
      "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n",
      "Summary: A guide to writing code that is easy to read, understand and maintain\n",
      "Publisher: prentice hall\n",
      "Reviews: 55\n",
      "Authors: ['robert c. martin']\n",
      "Score: 0.55407417\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_23541/1445678156.py:1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n",
      "  response = client.search(index=\"book_index\", body={\n"
     ]
    }
   ],
   "source": [
    "response = client.search(index=\"book_index\", body={\n",
    "    \"knn\": {\n",
    "      \"field\": \"title_vector\",\n",
    "      \"query_vector\": model.encode(\"Best javascript books?\"),\n",
    "      \"k\": 10,\n",
    "      \"num_candidates\": 100,\n",
    "      \"filter\": {\n",
    "          \"bool\": {\n",
    "              \"should\": [\n",
    "                  {\n",
    "                    \"term\": {\n",
    "                        \"publisher.keyword\": \"addison-wesley\"\n",
    "                    }\n",
    "                  },\n",
    "                  {\n",
    "                    \"term\": {\n",
    "                        \"authors.keyword\": \"robert c. martin\"\n",
    "                    }\n",
    "                  }\n",
    "              ],\n",
    "\n",
    "          }\n",
    "      }\n",
    "    }\n",
    "})\n",
    "\n",
    "pretty_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "6bb7d2dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ObjectApiResponse({'took': 0, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}})"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "7b8ff5a6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ID: cb8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-05-03\n",
      "Title: Python Crash Course\n",
      "Summary: A fast-paced, no-nonsense guide to programming in Python\n",
      "Publisher: no starch press\n",
      "Reviews: 42\n",
      "Authors: ['eric matthes']\n",
      "Score: None\n",
      "\n",
      "ID: cL8-DYsByaLf0EuT882D\n",
      "Publication date: 2019-10-29\n",
      "Title: The Pragmatic Programmer: Your Journey to Mastery\n",
      "Summary: A guide to pragmatic programming for software engineers and developers\n",
      "Publisher: addison-wesley\n",
      "Reviews: 30\n",
      "Authors: ['andrew hunt', 'david thomas']\n",
      "Score: None\n",
      "\n",
      "ID: eb8-DYsByaLf0EuT882D\n",
      "Publication date: 2012-06-27\n",
      "Title: Introduction to the Theory of Computation\n",
      "Summary: Introduction to the theory of computation and complexity theory\n",
      "Publisher: cengage learning\n",
      "Reviews: 33\n",
      "Authors: ['michael sipser']\n",
      "Score: None\n",
      "\n",
      "ID: d78-DYsByaLf0EuT882D\n",
      "Publication date: 2011-05-13\n",
      "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n",
      "Summary: A guide to professional conduct in the field of software engineering\n",
      "Publisher: prentice hall\n",
      "Reviews: 20\n",
      "Authors: ['robert c. martin']\n",
      "Score: None\n",
      "\n",
      "ID: cr8-DYsByaLf0EuT882D\n",
      "Publication date: 2020-04-06\n",
      "Title: Artificial Intelligence: A Modern Approach\n",
      "Summary: Comprehensive introduction to the theory and practice of artificial intelligence\n",
      "Publisher: pearson\n",
      "Reviews: 39\n",
      "Authors: ['stuart russell', 'peter norvig']\n",
      "Score: None\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/kw/pvmj1zgs44l8t32cs8blf6sc0000gn/T/ipykernel_23541/2859249433.py:1: DeprecationWarning: The 'body' parameter is deprecated and will be removed in a future version. Instead use individual parameters.\n",
      "  response = client.search(index=\"book_index\", body={\n"
     ]
    }
   ],
   "source": [
    "response = client.search(index=\"book_index\", body={\n",
    "    \"query\": {\n",
    "        \"match\": {\n",
    "            \"summary\": \"python\"\n",
    "        }\n",
    "    },\n",
    "    \"knn\": {\n",
    "        \"field\": \"title_vector\",\n",
    "        # generate embedding for query so it can be compared to `title_vector`\n",
    "        \"query_vector\" : model.encode(\"python programming\").tolist(),\n",
    "        \"k\": 5,\n",
    "        \"num_candidates\": 10\n",
    "    },\n",
    "    \"rank\": {\n",
    "        \"rrf\": {\n",
    "            \"window_size\": 100,\n",
    "            \"rank_constant\": 20\n",
    "        }\n",
    "    }\n",
    "})\n",
    "\n",
    "pretty_response(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3028d038",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
